loads libraries
library(here)
library(rjags)
library(ggplot2)
library(tidyverse)
library(gNanoPkg)
Initialise data
gNano.df = readData()
Specify where the results are stored relative to the gNano project and this file.
jamesDuncan = TRUE
resultsRoot = if(jamesDuncan){
"../systematic/"
}else{
"../gNanoPkg/systematic/"
}
loadResults = function(model = c(paste0("g-",0:4), paste0("ln-",0:4)),
summary = FALSE){
model = match.arg(model)
resultsFile = glue("{resultsRoot}{model}.Rda")
results = load(resultsFile)
if(summary){
return(simSummary)
}
return(as.data.frame(sim.sample[[1]]))
}
Model \(g_0\)
Model: \[ y_{c,l,a} \sim \Gamma(r,s) \] If \[ \mathrm{E}[y_{c,l,a}]) = \log(\mu)\mbox{ and }\mathrm{Var}[y_{c,l,a}] = \sigma^2 \] then we let \[ r = \frac{\mu^2}{\sigma^2}\mbox{ and }s = \frac{\mu}{\sigma^2} \] Our prior on \(\sigma^2\) is \(inverse-\Gamma(0.001, 0.001)\), and our prior on \(\log(\mu)\) is \(N(0, 10^6)\).
First I will load up the results.
g0.df = loadResults("g-0")
I’ll do this with ggplot2 so I can teach myself something
p = g0.df %>%
ggplot(aes(x = tau)) +
geom_density()
p
We want plots of shape and rate, so I will create them in the data frame first as we didn’t store them during the sampling
g0.df = g0.df %>%
mutate(shape = exp(log.Mu)^2 * tau) %>%
mutate(rate = exp(log.Mu) * tau)
p = g0.df %>%
ggplot(aes(x = shape))+
geom_density()
p
p = g0.df %>%
ggplot(aes(x = rate))+
geom_density()
p
rateDensity = density(g0.df$rate)
shapeDensity = density(g0.df$shape)
rateMode = rateDensity$x[which.max(rateDensity$y)]
shapeMode = shapeDensity$x[which.max(shapeDensity$y)]
density.df = data.frame(x = seq(1, 30000, 1)) %>%
mutate(y = dgamma(x, rate = rateMode, shape = shapeMode))
p = gNano.df %>%
ggplot(aes(x = obs, y = stat(density))) +
geom_histogram(binwidth = 400) +
geom_line(data = density.df, aes(x, y))
p
Plot the observed vs the expected peak heights
expected = g0.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(expected))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Model \(g_1\)
Let’s have a look at the profile effects. I like to use error bar plots
g1.df = loadResults("g-1")
This code is not elegant, and is probably stupid but then so is the tidyverse a lot of the time :-)
lb = g1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = g1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = g1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(profile = 1:102)
p = ggplot(data = quantiles.df, aes(x = profile, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(tau[c])) +
xlab("Profile") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
aph = gNano.df %>%
group_by(prof) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~tau[c])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the observed vs the expected peak heights
expected = g1.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(expected))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Model \(g_2\)
g2.df = loadResults("g-2")
lb = g2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = g2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = g2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(locus = 1:31)
p = ggplot(data = quantiles.df, aes(x = locus, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(alpha[l])) +
xlab("Locus") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the observed vs the expected peak heights
expected = g2.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(expected))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the locus effects by Average Peak Height:
aph = gNano.df %>%
group_by(loc) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~alpha[l])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Model \(g_2\)
Dye effects
g3.df = loadResults("g-3")
lb = g3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = g3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = g3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(dye = 1:4)
p = ggplot(data = quantiles.df, aes(x = dye, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(delta[f])) +
xlab("Dye") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:
aph = gNano.df %>%
group_by(dye) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = g3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~delta[f])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
Plot the observed vs the expected peak heights
expected = g3.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(expected))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Is the added variance being used?
g4.df = loadResults("g-4")
g4.df = g4.df %>%
mutate(sigma.sq0 = 1 / tau0)
p = g4.df %>%
ggplot(aes(x = sigma.sq0/10^6)) +
geom_density() +
geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.025)) +
geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.975)) +
xlab(bquote(sigma[0]^2~(x10^6))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the observed vs the expected peak heights
expected.g4 = g4.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected.g4 = expected.g4, expected.g3 = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(expected.g4))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Is it doing anything?
p = plot.df %>%
ggplot(aes(x = log10(expected.g3), y = log10(expected.g4))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5)
p
I think it’s doing what it is supposed to.
Let’s try and look at the expected values slightly differently. I will work with the summary stats rather than the raw data
simSummary = loadResults("g-3", summary = TRUE)
g3.quantiles = as.data.frame(t(simSummary$quantiles)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
g3.stats = as.data.frame(t(simSummary$statistics)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
simSummary = loadResults("g-4", summary = TRUE)
g4.quantiles = as.data.frame(t(simSummary$quantiles)) %>%
select(starts_with("pred")) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>%
arrange(var)
plot.df = bind_rows(g3.quantiles, g4.quantiles) %>%
mutate(model = rep(c("g3", "g4"), rep(nrow(g3.quantiles), 2))) %>%
mutate(obs = rep(gNano.df$obs, 2)) %>%
rename(med = `50%`, lb = `2.5%`, ub = `97.5%`) %>%
select(obs, model, lb, med, ub) %>%
arrange(obs)
p = plot.df %>%
ggplot(aes(x = log10(obs), y = log10(med))) +
facet_grid(vars(model)) +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
geom_point(show.legend = FALSE) +
geom_smooth() +
geom_line(aes(y = log10(lb)), col = "green") + geom_line(aes(y = log10(ub)), col = "red")
p
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_path).
Now we move on the the log normal models
Tau
ln0.df = loadResults("ln-0")
p = ln0.df %>%
ggplot(aes(x = tau)) +
geom_density()
p
Mu
p = ln0.df %>%
ggplot(aes(x = Mu)) +
geom_density()
p
Comparing expected and observed peak heights
gNano.df = readData()
expected = ln0.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Load the data
ln1.df = loadResults("ln-1")
Showing the per profile effects
lb = ln1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = ln1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = ln1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(profile = 1:102)
p = ggplot(data = quantiles.df, aes(x = profile, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(tau[c])) +
xlab("Profile") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Now showing the relationship between profile aph and profile effects from model
aph = gNano.df %>%
group_by(prof) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln1.df %>%
select(starts_with("beta.profile")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~tau[c])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plotting observed vs expected peak heights
expected = ln1.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
ln2.df = loadResults("ln-2")
lb = ln2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = ln2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = ln2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(locus = 1:31)
p = ggplot(data = quantiles.df, aes(x = locus, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(alpha[l])) +
xlab("Locus") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the observed vs the expected peak heights
expected = ln2.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the locus effects by Average Peak Height:
aph = gNano.df %>%
group_by(loc) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln2.df %>%
select(starts_with("alpha.locus")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~alpha[l])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Dye effects
ln3.df = loadResults("ln-3")
lb = ln3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.025)) %>%
unlist()
med = ln3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.5)) %>%
unlist()
ub = ln3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(quantile, prob = c(0.975)) %>%
unlist()
quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>%
mutate(dye = 1:4)
p = ggplot(data = quantiles.df, aes(x = dye, y = quantiles.df$med)) +
geom_point(aes(col = "red"), show.legend = FALSE) +
geom_errorbar(aes(ymin = lb, ymax = ub)) +
geom_hline(aes(yintercept=0)) +
ylab(bquote(delta[f])) +
xlab("Dye") +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:
aph = gNano.df %>%
group_by(dye) %>%
summarise(aph = mean(log(obs), na.rm = TRUE)) %>%
pull(aph)
meanEffect = ln3.df %>%
select(starts_with("gamma.dye")) %>%
summarise_all(mean, na.rm = TRUE) %>%
unlist()
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>%
ggplot(aes(x = aph, y = meanEffect)) +
geom_point(show.legend = FALSE) +
geom_smooth() +
xlab(expression(log[10]~(aph))) +
ylab(bquote(mean~delta[f])) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
Plot the observed vs the expected peak heights
expected = ln3.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Is the added variance being used?
ln4.df = loadResults("ln-4")
ln4.df = ln4.df %>%
mutate(sigma.sq0 = 1 / tau0)
p = ln4.df %>%
ggplot(aes(x = sigma.sq0)) +
geom_density() +
geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.025)) +
geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.975)) +
xlab(bquote(sigma[0]^2)) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
Plot the observed vs the expected peak heights
expected.ln4 = ln4.df %>%
select(starts_with("pred")) %>%
summarise_all(mean) %>%
unlist()
plot.df = data.frame(observed = gNano.df$obs, expected.ln4 = expected.ln4)
p = plot.df %>%
ggplot(aes(x = log10(observed), y = log10(exp(expected.ln4)))) +
geom_point() +
geom_abline(aes(intercept = 0, slope = 1)) +
xlim(1, 5) +
ylim(1, 5) +
xlab(expression(log[10]~(observed~height))) +
ylab(expression(log[10]~(expected~height))) +
theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
compare the dlog(likelihoods) for the log-normal model 4 vs the gamma model 4
# You need the latest version of tidyr to make this work.
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)
#loads the gamma model 4
g4.df = loadResults("g-4")
sr.df = g4.df %>%
select(c(starts_with("shape"), starts_with("rate")))
sr.df = sr.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
nObs = length(gNano.df$obs)
sr.df = sr.df %>%
mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))
sr.df = sr.df %>%
mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))
shape.df = sr.df %>%
filter(parameter == "shape")
rate.df = sr.df %>%
filter(parameter == "rate")
sr.df = data.frame(
obs = shape.df$obs,
y = rep(gNano.df$obs, 1000),
shape = shape.df$value,
rate = rate.df$value
)
sr.df = sr.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
g4.ll = sr.df %>%
group_by(rep) %>%
summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))
p = g4.ll %>%
ggplot(aes(x = ll)) + geom_histogram()
p
#loads the log-normal model 4
ln4.df = loadResults("ln-4")
ms.df = ln4.df %>%
select(c(starts_with("mu", ignore.case = FALSE), matches("^tau([.]|\\[)[0-9]+([.]|\\])$")))
ms.df = ms.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
nObs = length(gNano.df$obs)
ms.df = ms.df %>%
mutate(obs = as.numeric(gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))
ms.df = ms.df %>%
mutate(parameter = gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))
mu.df = ms.df %>%
filter(parameter == "mu")
sigma.df = ms.df %>%
filter(parameter == "tau") %>%
mutate(sigma = 1 / sqrt(value)) %>%
select(-value)
ms.df = data.frame(
obs = mu.df$obs,
y = rep(gNano.df$obs, 1000),
mu = mu.df$value,
sigma = sigma.df$sigma
)
ms.df = ms.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
ln4.ll = ms.df %>%
group_by(rep) %>%
summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))
plot.df = left_join(g4.ll, ln4.ll, by = c("rep" = "rep")) %>%
rename(g4 = ll.x, ln4 = ll.y)
# %>%
# pivot_longer(cols = c(g4, ln4), names_to = "model")
#plots densities
p = ggplot(plot.df)
p = p + geom_density(aes(x = g4), col="blue")
p = p + geom_density(aes(x = ln4), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("gamma", "log-normal"), values = c("blue", "red"))
p = p + scale_x_continuous(limits=c(-35600, -34600))
p
comparing the g3 model and the g4 model
# You need the latest version of tidyr to make this work.
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)
#loads the gamma model 4
g4.df = loadResults("g-4")
sr.df = g4.df %>%
select(c(starts_with("shape"), starts_with("rate")))
sr.df = sr.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
nObs = length(gNano.df$obs)
sr.df = sr.df %>%
mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))
sr.df = sr.df %>%
mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))
shape.df = sr.df %>%
filter(parameter == "shape")
rate.df = sr.df %>%
filter(parameter == "rate")
sr.df = data.frame(
obs = shape.df$obs,
y = rep(gNano.df$obs, 1000),
shape = shape.df$value,
rate = rate.df$value
)
sr.df = sr.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
g4.ll = sr.df %>%
group_by(rep) %>%
summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))
p = g4.ll %>%
ggplot(aes(x = ll)) + geom_histogram()
p
g3.df = loadResults("g-3")
sr.df = g3.df %>%
select(c(starts_with("shape"), starts_with("rate")))
sr.df = sr.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
nObs = length(gNano.df$obs)
sr.df = sr.df %>%
mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\2", parameter)))
## Warning: NAs introduced by coercion
sr.df = sr.df %>%
mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))
shape.df = sr.df %>%
filter(parameter == "shape")
rate.df = sr.df %>%
filter(parameter == "rate")
sr.df = data.frame(
obs = shape.df$obs,
y = rep(gNano.df$obs, 1000),
shape = shape.df$value,
rate = rate.df$value
)
sr.df = sr.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
g3.ll = sr.df %>%
group_by(rep) %>%
summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))
p = g3.ll %>%
ggplot(aes(x = ll)) + geom_histogram()
p
plot.df = left_join(g4.ll, g3.ll, by = c("rep" = "rep")) %>%
rename(g4 = ll.x, g3 = ll.y)
# %>%
# pivot_longer(cols = c(g4, ln4), names_to = "model")
#plots densities
p = ggplot(plot.df)
p = p + geom_density(aes(x = g4), col="blue")
p = p + geom_density(aes(x = g3), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("gamma4", "gamma3"), values = c("blue", "red"))
#p = p + scale_x_continuous(limits=c(-35600, -34600))
p
compare the ln3 and ln4 models
# You need the latest version of tidyr to make this work.
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)
#loads the log-normal model 3
ln3.df = loadResults("ln-3")
ms.df = ln3.df %>%
select(c(starts_with("mu", ignore.case = FALSE), matches("^tau$")))
ms.df = ms.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
sigma.df = ms.df %>%
filter(parameter == "tau") %>%
mutate(sigma = 1 / sqrt(value)) %>%
select(-value)
nObs = length(gNano.df$obs)
mu.df = ms.df %>%
filter(grepl("^mu", parameter)) %>%
mutate(obs = as.numeric(gsub("^mu([.]|\\[)([0-9]+)([.]|\\])$", "\\2", parameter))) %>%
select(-parameter)
ms.df = data.frame(
obs = mu.df$obs,
y = rep(gNano.df$obs, 1000),
mu = mu.df$value,
sigma = rep(sigma.df$sigma, rep(nObs, 1000))
)
ms.df = ms.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
ln3.ll = ms.df %>%
group_by(rep) %>%
summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))
ln4.df = loadResults("ln-4")
ms.df = ln4.df %>%
select(c(starts_with("mu", ignore.case = FALSE), matches("^tau([.]|\\[)[0-9]+([.]|\\])$")))
ms.df = ms.df %>%
pivot_longer(cols = everything(), names_to = "parameter")
nObs = length(gNano.df$obs)
ms.df = ms.df %>%
mutate(obs = as.numeric(gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))
ms.df = ms.df %>%
mutate(parameter = gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))
mu.df = ms.df %>%
filter(parameter == "mu")
sigma.df = ms.df %>%
filter(parameter == "tau") %>%
mutate(sigma = 1 / sqrt(value)) %>%
select(-value)
ms.df = data.frame(
obs = mu.df$obs,
y = rep(gNano.df$obs, 1000),
mu = mu.df$value,
sigma = sigma.df$sigma
)
ms.df = ms.df %>%
mutate(rep = rep(1:1000, rep(nObs, 1000)))
ln4.ll = ms.df %>%
group_by(rep) %>%
summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))
plot.df = left_join(ln3.ll, ln4.ll, by = c("rep" = "rep")) %>%
rename(ln3 = ll.x, ln4 = ll.y)
# %>%
# pivot_longer(cols = c(g4, ln4), names_to = "model")
#plots densities
p = ggplot(plot.df)
p = p + geom_density(aes(x = ln4), col="blue")
p = p + geom_density(aes(x = ln3), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("ln4", "ln3"), values = c("blue", "red"))
# p = p + scale_x_continuous(limits=c(-35600, -34600))
p